coreset_df <- read.csv('../data/coreset_train.csv')
coreset_df
table(coreset_df$class)

  0   1 
267   4 
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ───────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.2     ✓ purrr   0.3.4
✓ tibble  3.0.3     ✓ dplyr   1.0.1
✓ tidyr   1.1.1     ✓ stringr 1.4.0
✓ readr   1.3.1     ✓ forcats 0.5.0
── Conflicts ──────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(caret)
Loading required package: lattice
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     

Attaching package: ‘caret’

The following object is masked from ‘package:purrr’:

    lift
library(GGally)
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
library(ggplot2)
library(corrplot)
corrplot 0.84 loaded
library(bayesplot)
This is bayesplot version 1.7.2
- Online documentation and vignettes at mc-stan.org/bayesplot
- bayesplot theme set to bayesplot::theme_default()
   * Does _not_ affect other ggplot2 plots
   * See ?bayesplot_theme_set for details on theme setting
theme_set(bayesplot::theme_default(base_family = "sans"))
library(rstanarm)
Loading required package: Rcpp
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
This is rstanarm version 2.21.1
- See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
- Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
- For execution on a local, multicore CPU with excess RAM we recommend calling
  options(mc.cores = parallel::detectCores())

Attaching package: ‘rstanarm’

The following objects are masked from ‘package:caret’:

    compare_models, R2
options(mc.cores = parallel::detectCores())
library(loo)
This is loo version 2.3.1
- Online documentation and vignettes at mc-stan.org/loo
- As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session. 
library(projpred)
This is projpred version 1.1.6.
SEED=42
t_prior <- student_t(df = 7, location = 0, scale = 2.5)
post_full_coreset <- stan_glm(class ~ . , data = coreset_df,
                 family = binomial(link = "logit"), 
                 prior = t_prior, prior_intercept = t_prior,
                 cores=4, seed = 42)
starting worker pid=18038 on localhost:11740 at 15:12:59.396
starting worker pid=18052 on localhost:11740 at 15:12:59.787
starting worker pid=18066 on localhost:11740 at 15:13:00.134
starting worker pid=18080 on localhost:11740 at 15:13:00.556

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.001639 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 16.39 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.000175 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.75 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.000225 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 2.25 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)

SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 0.000175 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.75 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 7.87206 seconds (Warm-up)
Chain 4:                4.67815 seconds (Sampling)
Chain 4:                12.5502 seconds (Total)
Chain 4: 
Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 8.30372 seconds (Warm-up)
Chain 1:                7.77778 seconds (Sampling)
Chain 1:                16.0815 seconds (Total)
Chain 1: 
Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 8.08773 seconds (Warm-up)
Chain 2:                7.83582 seconds (Sampling)
Chain 2:                15.9235 seconds (Total)
Chain 2: 
Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 8.28147 seconds (Warm-up)
Chain 3:                7.70233 seconds (Sampling)
Chain 3:                15.9838 seconds (Total)
Chain 3: 
summary(post_full_coreset)

Model Info:
 function:     stan_glm
 family:       binomial [logit]
 formula:      class ~ .
 algorithm:    sampling
 sample:       4000 (posterior sample size)
 priors:       see help('prior_summary')
 observations: 271
 predictors:   65

Estimates:
              mean   sd    10%   50%   90%
(Intercept) -11.9    2.5 -15.2 -11.6  -8.9
Attr1        -0.2    2.7  -3.6  -0.2   3.1
Attr2         0.0    2.8  -3.4   0.0   3.5
Attr3         1.4    1.4  -0.3   1.4   3.2
Attr4        -0.4    2.8  -3.8  -0.3   3.0
Attr5         0.0    3.0  -3.7   0.0   3.6
Attr6         1.2    2.7  -2.1   1.0   4.5
Attr7         0.0    3.0  -3.6  -0.1   3.5
Attr8        -0.6    2.6  -3.9  -0.5   2.6
Attr9         0.4    1.9  -2.0   0.5   2.9
Attr10        0.1    2.8  -3.3   0.2   3.6
Attr11        0.2    2.1  -2.4   0.1   2.9
Attr12        0.4    2.7  -2.9   0.3   3.8
Attr13       -0.1    3.1  -3.9   0.0   3.6
Attr14       -0.1    2.9  -3.6  -0.1   3.4
Attr15       -1.0    1.6  -3.1  -1.1   1.1
Attr16       -1.2    2.4  -4.3  -1.1   1.8
Attr17       -0.6    2.5  -3.9  -0.5   2.5
Attr18       -0.1    3.1  -3.8   0.0   3.6
Attr19       -0.1    2.7  -3.5  -0.1   3.2
Attr20        1.0    2.8  -2.3   0.9   4.4
Attr21        0.0    3.0  -3.4   0.0   3.5
Attr22       -0.6    2.4  -3.6  -0.6   2.3
Attr23        0.1    2.8  -3.3   0.0   3.5
Attr24        1.8    2.3  -0.8   1.5   4.6
Attr25        0.6    2.5  -2.5   0.5   3.7
Attr26       -0.8    2.4  -3.7  -0.8   2.1
Attr27        0.0    3.1  -3.5   0.0   3.7
Attr28       -0.2    3.0  -3.7  -0.2   3.2
Attr29        1.2    1.0   0.0   1.2   2.5
Attr30        0.3    2.8  -3.0   0.3   3.7
Attr31       -0.2    2.9  -3.8  -0.1   3.4
Attr32        1.1    3.0  -2.5   0.9   4.7
Attr33       -1.3    2.7  -4.6  -1.0   1.8
Attr34        1.4    1.8  -0.8   1.4   3.6
Attr35       -1.2    2.0  -3.9  -1.2   1.3
Attr36       -1.8    1.8  -4.3  -1.6   0.4
Attr37       -1.0    2.6  -4.2  -0.8   2.0
Attr38       -0.2    2.9  -3.6  -0.2   3.3
Attr39       -0.1    2.8  -3.5   0.0   3.3
Attr40        0.3    2.6  -2.9   0.3   3.5
Attr41       -0.5    0.7  -1.3  -0.6   0.5
Attr42        0.1    2.8  -3.2   0.1   3.4
Attr43        0.5    2.8  -2.9   0.5   3.8
Attr44       -0.3    2.6  -3.6  -0.2   2.8
Attr45        0.1    3.0  -3.5   0.1   3.6
Attr46       -0.1    2.8  -3.5   0.0   3.3
Attr47        0.2    2.8  -3.2   0.2   3.5
Attr48        0.7    2.1  -1.9   0.6   3.4
Attr49        0.1    2.8  -3.2   0.1   3.5
Attr50       -0.4    2.2  -3.2  -0.3   2.4
Attr51        2.5    1.4   0.8   2.5   4.3
Attr52        1.0    2.8  -2.3   0.8   4.4
Attr53       -0.8    2.3  -3.8  -0.7   1.9
Attr54       -0.2    2.8  -3.5  -0.2   3.1
Attr55       -1.8    1.1  -3.3  -1.8  -0.4
Attr56        0.5    2.7  -2.8   0.4   3.9
Attr57       -0.4    1.9  -2.9  -0.3   2.0
Attr58        0.3    2.8  -3.1   0.3   3.8
Attr59       -0.8    2.9  -4.4  -0.6   2.6
Attr60       -0.1    2.9  -3.6  -0.1   3.2
Attr61       -0.9    2.4  -4.0  -0.7   2.0
Attr62        1.1    3.0  -2.4   0.9   4.7
Attr63       -1.2    2.7  -4.5  -1.0   2.1
Attr64       -0.4    2.9  -3.8  -0.4   3.0

Fit Diagnostics:
           mean   sd   10%   50%   90%
mean_PPD 0.0    0.0  0.0   0.0   0.0  

The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).

MCMC diagnostics
              mcse Rhat n_eff
(Intercept)   0.1  1.0  2178 
Attr1         0.0  1.0  5635 
Attr2         0.0  1.0  8509 
Attr3         0.0  1.0  2805 
Attr4         0.0  1.0  6429 
Attr5         0.0  1.0  7129 
Attr6         0.0  1.0  4202 
Attr7         0.0  1.0  6146 
Attr8         0.0  1.0  4713 
Attr9         0.0  1.0  3832 
Attr10        0.0  1.0  7036 
Attr11        0.0  1.0  3985 
Attr12        0.0  1.0  4648 
Attr13        0.0  1.0  7755 
Attr14        0.0  1.0  6985 
Attr15        0.0  1.0  4448 
Attr16        0.0  1.0  3133 
Attr17        0.0  1.0  5694 
Attr18        0.0  1.0  5717 
Attr19        0.0  1.0  6102 
Attr20        0.0  1.0  5243 
Attr21        0.0  1.0  5839 
Attr22        0.0  1.0  5311 
Attr23        0.0  1.0  5737 
Attr24        0.0  1.0  3037 
Attr25        0.0  1.0  4864 
Attr26        0.0  1.0  4338 
Attr27        0.0  1.0  6827 
Attr28        0.0  1.0  5539 
Attr29        0.0  1.0  3611 
Attr30        0.0  1.0  6520 
Attr31        0.0  1.0  5564 
Attr32        0.0  1.0  5555 
Attr33        0.0  1.0  5013 
Attr34        0.0  1.0  4016 
Attr35        0.0  1.0  4892 
Attr36        0.0  1.0  3357 
Attr37        0.0  1.0  4758 
Attr38        0.0  1.0  6104 
Attr39        0.0  1.0  5848 
Attr40        0.0  1.0  5272 
Attr41        0.0  1.0  2429 
Attr42        0.0  1.0  5358 
Attr43        0.0  1.0  5218 
Attr44        0.0  1.0  4659 
Attr45        0.0  1.0  5289 
Attr46        0.0  1.0  5811 
Attr47        0.0  1.0  6313 
Attr48        0.0  1.0  4475 
Attr49        0.0  1.0  5347 
Attr50        0.0  1.0  4523 
Attr51        0.0  1.0  2461 
Attr52        0.0  1.0  4553 
Attr53        0.0  1.0  4249 
Attr54        0.0  1.0  5930 
Attr55        0.0  1.0  3499 
Attr56        0.0  1.0  5630 
Attr57        0.0  1.0  3997 
Attr58        0.0  1.0  5521 
Attr59        0.0  1.0  6011 
Attr60        0.0  1.0  5062 
Attr61        0.0  1.0  5236 
Attr62        0.0  1.0  7156 
Attr63        0.0  1.0  4899 
Attr64        0.0  1.0  5580 
mean_PPD      0.0  1.0  4274 
log-posterior 0.2  1.0  1499 

For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
prior_summary(post_full_coreset)
Priors for model 'post_full_coreset' 
------
Intercept (after predictors centered)
 ~ student_t(df = 7, location = 0, scale = 2.5)

Coefficients
 ~ student_t(df = [7,7,7,...], location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
------
See help('prior_summary.stanreg') for more details
pp_check(post_full_coreset, "dens_overlay")

pp_check(post_full_coreset, "stat")

Test now

bankruptcy_test <- read_csv('../data/bankruptcy_test_am.csv')
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
  .default = col_double()
)
See spec(...) for full column specifications.
bankruptcy_test <- select(bankruptcy_test, -X1)
bankruptcy_test
table(bankruptcy_test$class)

   0    1 
2105   47 
bankruptcy_test_x <- select(bankruptcy_test, -class)
dim(bankruptcy_test_x)
[1] 2152   64
test_pred <- posterior_predict(post_full_coreset, newdata = bankruptcy_test_x)
dim(test_pred)
[1] 4000 2152
hist(test_pred)

pred <- colMeans(test_pred)
pr <- as.integer(pred >= 0.5)
table(pr)
pr
   0    1 
2139   13 
true_pr <- bankruptcy_test$class
table(true_pr)
true_pr
   0    1 
2105   47 
table(true_pr, pr)
       pr
true_pr    0    1
      0 2093   12
      1   46    1
bayes_R2 <- function(fit) {
  mupred <- rstanarm::posterior_linpred(fit, transform = TRUE)
  var_mupred <- apply(mupred, 1, var)
  if (family(fit)$family == "binomial" && NCOL(y) == 1) {
      sigma2 <- apply(mupred*(1-mupred), 1, mean)
  } else {
      sigma2 <- as.matrix(fit, pars = c("sigma"))^2
  }
  var_mupred / (var_mupred + sigma2)
}
y <- coreset_df$class
round(median(bayesR2<-bayes_R2(post_full_coreset)), 2)
Instead of posterior_linpred(..., transform=TRUE) please call posterior_epred(), which provides equivalent functionality.
[1] 0.58
pxl<-xlim(0,1)
mcmc_hist(data.frame(bayesR2), binwidth=0.02) + pxl +
    scale_y_continuous(breaks=NULL) +
    xlab('Bayesian R2') +
    geom_vline(xintercept=median(bayesR2))
Scale for 'y' is already present. Adding another scale for 'y', which will replace the
existing scale.

library(MLmetrics)

Attaching package: ‘MLmetrics’

The following objects are masked from ‘package:caret’:

    MAE, RMSE

The following object is masked from ‘package:base’:

    Recall
ConfusionMatrix(pr, true_pr)
      y_pred
y_true    0    1
     0 2093   12
     1   46    1
Precision(true_pr, pr)
[1] 0.9784946
Recall(true_pr, pr)
[1] 0.9942993

Plotting and analysing

plot(post_full_coreset, "areas")

coef(post_full_coreset)
  (Intercept)         Attr1         Attr2         Attr3         Attr4         Attr5 
-1.159387e+01 -2.087937e-01  4.375479e-02  1.381348e+00 -3.403171e-01  1.576582e-02 
        Attr6         Attr7         Attr8         Attr9        Attr10        Attr11 
 1.024552e+00 -7.899878e-02 -5.134149e-01  4.549746e-01  1.586445e-01  1.495039e-01 
       Attr12        Attr13        Attr14        Attr15        Attr16        Attr17 
 3.083649e-01 -4.576755e-02 -6.845048e-02 -1.092996e+00 -1.080192e+00 -4.828847e-01 
       Attr18        Attr19        Attr20        Attr21        Attr22        Attr23 
-3.177100e-02 -8.394860e-02  8.560833e-01 -8.891445e-04 -5.672796e-01 -5.624404e-03 
       Attr24        Attr25        Attr26        Attr27        Attr28        Attr29 
 1.476013e+00  5.079452e-01 -7.561422e-01  1.222525e-02 -1.933720e-01  1.187093e+00 
       Attr30        Attr31        Attr32        Attr33        Attr34        Attr35 
 3.305826e-01 -1.048468e-01  8.724477e-01 -1.037943e+00  1.374382e+00 -1.158643e+00 
       Attr36        Attr37        Attr38        Attr39        Attr40        Attr41 
-1.643886e+00 -8.025335e-01 -1.700301e-01 -1.689906e-02  2.818221e-01 -5.967309e-01 
       Attr42        Attr43        Attr44        Attr45        Attr46        Attr47 
 5.361213e-02  4.562551e-01 -2.498703e-01  7.502764e-02 -1.703349e-02  1.811780e-01 
       Attr48        Attr49        Attr50        Attr51        Attr52        Attr53 
 5.527201e-01  9.417702e-02 -3.081719e-01  2.463733e+00  8.397935e-01 -6.800738e-01 
       Attr54        Attr55        Attr56        Attr57        Attr58        Attr59 
-2.268086e-01 -1.752727e+00  3.935452e-01 -2.584169e-01  3.493494e-01 -6.483292e-01 
       Attr60        Attr61        Attr62        Attr63        Attr64 
-9.284951e-02 -7.445959e-01  9.152790e-01 -1.018643e+00 -3.613087e-01 
posterior_interval(post_full_coreset)
                     5%         95%
(Intercept) -16.5819765 -8.25334727
Attr1        -4.6944816  4.25091821
Attr2        -4.3239556  4.55609703
Attr3        -0.7307162  3.72894171
Attr4        -4.8744037  3.98039460
Attr5        -4.8331070  4.83294700
Attr6        -3.0882285  5.82097875
Attr7        -4.6496339  4.63894533
Attr8        -4.9916277  3.56914225
Attr9        -2.7023824  3.63238631
Attr10       -4.3906836  4.63475729
Attr11       -3.0090301  3.69660666
Attr12       -3.8485979  5.06645190
Attr13       -5.1378647  4.79021074
Attr14       -4.8735712  4.65483123
Attr15       -3.5893669  1.73614905
Attr16       -5.3022113  2.60623610
Attr17       -5.0631693  3.32997329
Attr18       -5.1837027  4.90156905
Attr19       -4.5233812  4.29279433
Attr20       -3.3598899  5.64601174
Attr21       -4.4998512  4.63264020
Attr22       -4.6176297  3.16376657
Attr23       -4.3132488  4.68909028
Attr24       -1.3206109  5.88015354
Attr25       -3.3661084  4.72937436
Attr26       -4.6674899  2.89053127
Attr27       -4.7111418  4.78099682
Attr28       -5.0435758  4.53297016
Attr29       -0.3455968  2.88143317
Attr30       -3.8822444  4.96812770
Attr31       -4.8359562  4.42422322
Attr32       -3.5555204  6.25742879
Attr33       -5.8224550  2.72101330
Attr34       -1.5265292  4.21545005
Attr35       -4.6950274  2.00022730
Attr36       -4.9929539  0.93185739
Attr37       -5.4662661  2.89055281
Attr38       -4.8896395  4.40300141
Attr39       -4.7356877  4.57536114
Attr40       -3.9563963  4.52584355
Attr41       -1.4350350  0.85267110
Attr42       -4.3623511  4.45417932
Attr43       -3.9328080  4.96118556
Attr44       -4.5767794  3.81244108
Attr45       -4.6079412  4.88442936
Attr46       -4.7265358  4.36572434
Attr47       -4.1053944  4.63497728
Attr48       -2.6321474  4.27977621
Attr49       -4.2823683  4.60898125
Attr50       -4.1533470  3.16280355
Attr51        0.3160842  4.88517669
Attr52       -3.2753745  5.51773210
Attr53       -4.8797454  2.72847363
Attr54       -4.6567941  4.16568441
Attr55       -3.8132586 -0.08295383
Attr56       -3.8621260  5.04418630
Attr57       -3.7569650  2.61040989
Attr58       -4.1395681  4.96649195
Attr59       -5.7716735  3.72474766
Attr60       -4.8356731  4.36907349
Attr61       -5.0413434  2.84358910
Attr62       -3.5429171  6.14524241
Attr63       -5.7885882  2.99430012
Attr64       -4.9843490  4.20072189

Bayesian Variable Selection

vs_coreset <- varsel(post_full_coreset, method='forward')
Instead of posterior_linpred(..., transform=TRUE) please call posterior_epred(), which provides equivalent functionality.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
glm_ridge warning: maximum number of line search iterations reached. The optimization can be ill-behaved.
vs_coreset$vind
Attr55 Attr43 Attr10 Attr29 Attr34 Attr41 Attr62 Attr15 Attr64 Attr25 Attr35 Attr51  Attr2 
    55     43     10     29     34     41     62     15     64     25     35     51      2 
 Attr3 Attr30 Attr40 Attr13 Attr60 Attr24 Attr61 
     3     30     40     13     60     24     61 

loo

lpost1 <- readRDS("../model/post1.rds")
loo(lpost1, post_full_coreset)
all scheduled cores encountered errors in user codeError in FUN(X[[i]], ...) : subscript out of bounds

Apply neural network to Coreset

Pre-process - factor and scale data

training_data <- read_csv('../data/coreset_train.csv')
Parsed with column specification:
cols(
  .default = col_double()
)
See spec(...) for full column specifications.
head(training_data)
test_data <- select(read_csv('../data/bankruptcy_test_am.csv'), -X1)
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
  .default = col_double()
)
See spec(...) for full column specifications.
head(test_data)
normalize <- function(x) {
  return ((x - min(x)) / (max(x) - min(x)))
}
training_data_pp <- as.data.frame(lapply(training_data, normalize))
test_data_pp <- as.data.frame(lapply(test_data, normalize))

Neural Network Classifier

library('neuralnet')
nn <- neuralnet(class ~ ., data=training_data_pp, hidden=c(32,16,8,4), linear.output=FALSE, threshold=0.01)
plot(nn)

nn.results <- compute(nn, test_data_pp)
results <- data.frame(actual = test_data_pp$class, prediction = nn.results$net.result)
results
roundedresults<-sapply(results,round,digits=0)
roundedresultsdf=data.frame(roundedresults)
attach(roundedresultsdf)
The following objects are masked from roundedresultsdf (pos = 3):

    actual, prediction

The following objects are masked from roundedresultsdf (pos = 4):

    actual, prediction

The following objects are masked from roundedresultsdf (pos = 13):

    actual, prediction
table(actual,prediction)
      prediction
actual    0
     0 2105
     1   47
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmNvcmVzZXRfZGYgPC0gcmVhZC5jc3YoJy4uL2RhdGEvY29yZXNldF90cmFpbi5jc3YnKQpjb3Jlc2V0X2RmCmBgYAoKYGBge3J9CnRhYmxlKGNvcmVzZXRfZGYkY2xhc3MpCmBgYAoKCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShjYXJldCkKbGlicmFyeShHR2FsbHkpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShjb3JycGxvdCkKbGlicmFyeShiYXllc3Bsb3QpCnRoZW1lX3NldChiYXllc3Bsb3Q6OnRoZW1lX2RlZmF1bHQoYmFzZV9mYW1pbHkgPSAic2FucyIpKQpsaWJyYXJ5KHJzdGFuYXJtKQpvcHRpb25zKG1jLmNvcmVzID0gcGFyYWxsZWw6OmRldGVjdENvcmVzKCkpCmxpYnJhcnkobG9vKQpsaWJyYXJ5KHByb2pwcmVkKQpTRUVEPTQyCmBgYAoKCmBgYHtyfQp0X3ByaW9yIDwtIHN0dWRlbnRfdChkZiA9IDcsIGxvY2F0aW9uID0gMCwgc2NhbGUgPSAyLjUpCmBgYAoKYGBge3J9CnBvc3RfZnVsbF9jb3Jlc2V0IDwtIHN0YW5fZ2xtKGNsYXNzIH4gLiAsIGRhdGEgPSBjb3Jlc2V0X2RmLAogICAgICAgICAgICAgICAgIGZhbWlseSA9IGJpbm9taWFsKGxpbmsgPSAibG9naXQiKSwgCiAgICAgICAgICAgICAgICAgcHJpb3IgPSB0X3ByaW9yLCBwcmlvcl9pbnRlcmNlcHQgPSB0X3ByaW9yLAogICAgICAgICAgICAgICAgIGNvcmVzPTQsIHNlZWQgPSA0MikKYGBgCgpgYGB7cn0Kc3VtbWFyeShwb3N0X2Z1bGxfY29yZXNldCkKYGBgCgpgYGB7cn0KcHJpb3Jfc3VtbWFyeShwb3N0X2Z1bGxfY29yZXNldCkKYGBgCgoKYGBge3J9CnBwX2NoZWNrKHBvc3RfZnVsbF9jb3Jlc2V0LCAiZGVuc19vdmVybGF5IikKYGBgCgpgYGB7cn0KcHBfY2hlY2socG9zdF9mdWxsX2NvcmVzZXQsICJzdGF0IikKYGBgCgojIyBUZXN0IG5vdwoKYGBge3J9CmJhbmtydXB0Y3lfdGVzdCA8LSByZWFkX2NzdignLi4vZGF0YS9iYW5rcnVwdGN5X3Rlc3RfYW0uY3N2JykKYmFua3J1cHRjeV90ZXN0IDwtIHNlbGVjdChiYW5rcnVwdGN5X3Rlc3QsIC1YMSkKYmFua3J1cHRjeV90ZXN0CmBgYApgYGB7cn0KdGFibGUoYmFua3J1cHRjeV90ZXN0JGNsYXNzKQpgYGAKCgpgYGB7cn0KYmFua3J1cHRjeV90ZXN0X3ggPC0gc2VsZWN0KGJhbmtydXB0Y3lfdGVzdCwgLWNsYXNzKQpkaW0oYmFua3J1cHRjeV90ZXN0X3gpCmBgYAoKCmBgYHtyfQp0ZXN0X3ByZWQgPC0gcG9zdGVyaW9yX3ByZWRpY3QocG9zdF9mdWxsX2NvcmVzZXQsIG5ld2RhdGEgPSBiYW5rcnVwdGN5X3Rlc3RfeCkKYGBgCgpgYGB7cn0KZGltKHRlc3RfcHJlZCkKaGlzdCh0ZXN0X3ByZWQpCmBgYAoKYGBge3J9CnByZWQgPC0gY29sTWVhbnModGVzdF9wcmVkKQpwciA8LSBhcy5pbnRlZ2VyKHByZWQgPj0gMC41KQpgYGAKCmBgYHtyfQp0YWJsZShwcikKYGBgCgpgYGB7cn0KdHJ1ZV9wciA8LSBiYW5rcnVwdGN5X3Rlc3QkY2xhc3MKdGFibGUodHJ1ZV9wcikKYGBgCgpgYGB7cn0KdGFibGUodHJ1ZV9wciwgcHIpCmBgYAoKYGBge3J9CmJheWVzX1IyIDwtIGZ1bmN0aW9uKGZpdCkgewogIG11cHJlZCA8LSByc3RhbmFybTo6cG9zdGVyaW9yX2xpbnByZWQoZml0LCB0cmFuc2Zvcm0gPSBUUlVFKQogIHZhcl9tdXByZWQgPC0gYXBwbHkobXVwcmVkLCAxLCB2YXIpCiAgaWYgKGZhbWlseShmaXQpJGZhbWlseSA9PSAiYmlub21pYWwiICYmIE5DT0woeSkgPT0gMSkgewogICAgICBzaWdtYTIgPC0gYXBwbHkobXVwcmVkKigxLW11cHJlZCksIDEsIG1lYW4pCiAgfSBlbHNlIHsKICAgICAgc2lnbWEyIDwtIGFzLm1hdHJpeChmaXQsIHBhcnMgPSBjKCJzaWdtYSIpKV4yCiAgfQogIHZhcl9tdXByZWQgLyAodmFyX211cHJlZCArIHNpZ21hMikKfQpgYGAKCgpgYGB7cn0KeSA8LSBjb3Jlc2V0X2RmJGNsYXNzCnJvdW5kKG1lZGlhbihiYXllc1IyPC1iYXllc19SMihwb3N0X2Z1bGxfY29yZXNldCkpLCAyKQpgYGAKCmBgYHtyfQpweGw8LXhsaW0oMCwxKQptY21jX2hpc3QoZGF0YS5mcmFtZShiYXllc1IyKSwgYmlud2lkdGg9MC4wMikgKyBweGwgKwogICAgc2NhbGVfeV9jb250aW51b3VzKGJyZWFrcz1OVUxMKSArCiAgICB4bGFiKCdCYXllc2lhbiBSMicpICsKICAgIGdlb21fdmxpbmUoeGludGVyY2VwdD1tZWRpYW4oYmF5ZXNSMikpCmBgYAoKCmBgYHtyfQpsaWJyYXJ5KE1MbWV0cmljcykKYGBgCgpgYGB7cn0KQ29uZnVzaW9uTWF0cml4KHByLCB0cnVlX3ByKQpgYGAKCmBgYHtyfQpQcmVjaXNpb24odHJ1ZV9wciwgcHIpCmBgYAoKYGBge3J9ClJlY2FsbCh0cnVlX3ByLCBwcikKYGBgCgojIyBQbG90dGluZyBhbmQgYW5hbHlzaW5nCmBgYHtyfQpwbG90KHBvc3RfZnVsbF9jb3Jlc2V0LCAiYXJlYXMiKQpgYGAKCmBgYHtyfQpjb2VmKHBvc3RfZnVsbF9jb3Jlc2V0KQpgYGAKCmBgYHtyfQpwb3N0ZXJpb3JfaW50ZXJ2YWwocG9zdF9mdWxsX2NvcmVzZXQpCmBgYAoKIyMgQmF5ZXNpYW4gVmFyaWFibGUgU2VsZWN0aW9uCmBgYHtyfQp2c19jb3Jlc2V0IDwtIHZhcnNlbChwb3N0X2Z1bGxfY29yZXNldCwgbWV0aG9kPSdmb3J3YXJkJykKYGBgCgpgYGB7cn0KdnNfY29yZXNldCR2aW5kCmBgYAoKCiMjIGxvbwoKYGBge3J9Cmxwb3N0MSA8LSByZWFkUkRTKCIuLi9tb2RlbC9wb3N0MS5yZHMiKQpgYGAKCmBgYHtyfQpsb28obHBvc3QxLCBwb3N0X2Z1bGxfY29yZXNldCkKYGBgCiMjIEFwcGx5IG5ldXJhbCBuZXR3b3JrIHRvIENvcmVzZXQKCiMjIyBQcmUtcHJvY2VzcyAtIGZhY3RvciBhbmQgc2NhbGUgZGF0YQoKCgpgYGB7cn0KdHJhaW5pbmdfZGF0YSA8LSByZWFkX2NzdignLi4vZGF0YS9jb3Jlc2V0X3RyYWluLmNzdicpCmhlYWQodHJhaW5pbmdfZGF0YSkKYGBgCmBgYHtyfQp0ZXN0X2RhdGEgPC0gc2VsZWN0KHJlYWRfY3N2KCcuLi9kYXRhL2JhbmtydXB0Y3lfdGVzdF9hbS5jc3YnKSwgLVgxKQpoZWFkKHRlc3RfZGF0YSkKYGBgCgoKCmBgYHtyfQpub3JtYWxpemUgPC0gZnVuY3Rpb24oeCkgewogIHJldHVybiAoKHggLSBtaW4oeCkpIC8gKG1heCh4KSAtIG1pbih4KSkpCn0KYGBgCgoKYGBge3J9CnRyYWluaW5nX2RhdGFfcHAgPC0gYXMuZGF0YS5mcmFtZShsYXBwbHkodHJhaW5pbmdfZGF0YSwgbm9ybWFsaXplKSkKdGVzdF9kYXRhX3BwIDwtIGFzLmRhdGEuZnJhbWUobGFwcGx5KHRlc3RfZGF0YSwgbm9ybWFsaXplKSkKCmBgYAoKCgoKIyMgTmV1cmFsIE5ldHdvcmsgQ2xhc3NpZmllcgpgYGB7cn0KbGlicmFyeSgnbmV1cmFsbmV0JykKYGBgCgoKCmBgYHtyfQpubiA8LSBuZXVyYWxuZXQoY2xhc3MgfiAuLCBkYXRhPXRyYWluaW5nX2RhdGFfcHAsIGhpZGRlbj1jKDMyLDE2LDgsNCksIGxpbmVhci5vdXRwdXQ9RkFMU0UsIHRocmVzaG9sZD0wLjAxKQpwbG90KG5uKQpgYGAKCgpgYGB7cn0Kbm4ucmVzdWx0cyA8LSBjb21wdXRlKG5uLCB0ZXN0X2RhdGFfcHApCnJlc3VsdHMgPC0gZGF0YS5mcmFtZShhY3R1YWwgPSB0ZXN0X2RhdGFfcHAkY2xhc3MsIHByZWRpY3Rpb24gPSBubi5yZXN1bHRzJG5ldC5yZXN1bHQpCnJlc3VsdHMKYGBgCgpgYGB7cn0Kcm91bmRlZHJlc3VsdHM8LXNhcHBseShyZXN1bHRzLHJvdW5kLGRpZ2l0cz0wKQpyb3VuZGVkcmVzdWx0c2RmPWRhdGEuZnJhbWUocm91bmRlZHJlc3VsdHMpCmF0dGFjaChyb3VuZGVkcmVzdWx0c2RmKQp0YWJsZShhY3R1YWwscHJlZGljdGlvbikKYGBgCgoKYGBge3J9CmBgYAoKCmBgYHtyfQpgYGAKCgpgYGB7cn0KYGBgCgo=